from typing import Final

import pytest

from inspect_ai._util.text import str_to_float
from inspect_ai.scorer._common import normalize_number
from inspect_ai.scorer._unicode import unicode_number_to_float


def test_str_to_float_basic():
    assert str_to_float("1²") == 1.0
    assert str_to_float("2³") == 8.0
    assert str_to_float("5⁴") == 625.0
    assert str_to_float("10⁰") == 1.0
    assert str_to_float("3") == 3.0


def test_str_to_float_decimal_base():
    assert str_to_float("2.5²") == 2.5**2
    assert str_to_float("0.1³") == 0.1**3


def test_str_to_float_negative_base():
    assert str_to_float("-2²") == (-2) ** 2
    assert str_to_float("-2³") == (-2) ** 3


def test_str_to_float_multi_digit_exponent():
    assert str_to_float("2⁴⁵") == 2**45
    assert str_to_float("3⁰⁰⁰") == 3**0  # Exponent is 0


def test_str_to_float_no_exponent():
    assert str_to_float("7") == 7.0
    assert str_to_float("0") == 0.0


def test_str_to_float_no_base():
    # When the base is missing, default to 1.0
    assert str_to_float("⁵") == 1.0**5
    assert str_to_float("⁰") == 1.0**0


def test_str_to_float_zero_exponent():
    assert str_to_float("5⁰") == 1.0
    assert str_to_float("0⁰") == 1.0  # 0^0 is considered 1 in this context


def test_str_to_float_invalid_input():
    with pytest.raises(ValueError):
        str_to_float("abc")
    with pytest.raises(ValueError):
        str_to_float("")
    with pytest.raises(ValueError):
        str_to_float("⁺²")  # Unsupported superscript characters


def test_str_to_float_edge_cases():
    # Base with unsupported characters
    with pytest.raises(ValueError):
        str_to_float("a²")
    # Superscript after decimal point
    assert str_to_float("2.5⁴") == 2.5**4


def test_str_to_float_unicode_fractions():
    # Test simple fraction characters
    assert str_to_float("½") == 0.5
    assert str_to_float("¼") == 0.25
    assert str_to_float("¾") == 0.75
    assert str_to_float("⅓") == 1 / 3
    assert str_to_float("⅔") == 2 / 3

    # Test more complex fractions
    assert str_to_float("⅛") == 0.125
    assert str_to_float("⅜") == 0.375
    assert str_to_float("⅝") == 0.625
    assert str_to_float("⅞") == 0.875


def test_str_to_float_mixed_fractions():
    # Whole number with fraction
    assert str_to_float("2½") == 2.5
    assert str_to_float("1¾") == 1.75
    assert str_to_float("3⅓") == 3 + (1 / 3)

    # Negative number with fraction
    assert str_to_float("-2½") == -2.5
    assert str_to_float("-1¼") == -1.25


def test_str_to_float_mixed_fractions_with_exponents():
    # Fraction with exponent
    assert str_to_float("½²") == 0.5**2
    assert str_to_float("¾³") == 0.75**3

    # Fraction with multi-digit exponents
    assert str_to_float("½²³") == 0.5**23  # Interpreted as 0.5^23, not (0.5^2)^3
    assert str_to_float("¾³²") == 0.75**32  # Interpreted as 0.75^32, not (0.75^3)^2

    # Whole number, fraction, and exponent
    assert str_to_float("2½²") == 2.5**2
    assert str_to_float("1¾³") == 1.75**3

    # Negative number with fraction and exponent
    assert str_to_float("-2½²") == (-2.5) ** 2
    assert str_to_float("-1¼³") == (-1.25) ** 3


def test_str_to_float_fraction_invalid_input():
    # Multiple fraction characters
    with pytest.raises(ValueError):
        str_to_float("½¾")

    # Invalid character before fraction
    with pytest.raises(ValueError):
        str_to_float("a½")


def test_str_to_float_trailing_decimal_groups():
    # Test the original case with many trailing decimal groups
    assert (
        str_to_float(
            "31800.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0"
        )
        == 31800.00
    )

    # Test simpler cases with trailing decimal groups
    assert str_to_float("100.00.00") == 100.00
    assert str_to_float("42.5.5.5") == 42.5
    assert str_to_float("3.14159.265") == 3.14159

    # Test with trailing text after valid float
    assert str_to_float("123.45abc") == 123.45
    assert str_to_float("99.99xyz123") == 99.99
    assert str_to_float("0.5hello") == 0.5

    # Test negative numbers with trailing content
    assert str_to_float("-50.25.25") == -50.25
    assert str_to_float("-100.00.00.00") == -100.00
    assert str_to_float("-3.14extra") == -3.14

    # Test integers with trailing content
    assert str_to_float("42garbage") == 42.0
    assert str_to_float("100abc123") == 100.0
    assert str_to_float("-75text") == -75.0

    # Test edge cases
    assert str_to_float("0.0.0.0") == 0.0
    assert str_to_float("1.") == 1.0  # Valid float with trailing dot
    # Note: .5extra doesn't match the regex pattern ^([+-]?\d+(?:\.\d+)?)

    # Test with signs
    assert str_to_float("+123.45.67") == 123.45
    assert str_to_float("-987.65.43") == -987.65


def test_basic_ascii_and_fullwidth() -> None:
    assert unicode_number_to_float("3") == 3.0
    assert unicode_number_to_float("４５．６") == pytest.approx(45.6)
    assert unicode_number_to_float("1 234,56") == pytest.approx(1234.56)


def test_grouping_and_decimal_heuristics() -> None:
    # Rightmost of dot/comma is decimal
    assert unicode_number_to_float("1,234.56") == pytest.approx(1234.56)
    assert unicode_number_to_float("1.234,56") == pytest.approx(1234.56)
    # NBSP / thin spaces as grouping
    assert unicode_number_to_float("12\u00a0345,67") == pytest.approx(12345.67)
    assert unicode_number_to_float("12\u202f345,67") == pytest.approx(12345.67)
    # Apostrophe-like grouping with comma decimal
    assert unicode_number_to_float("3’141’592,65") == pytest.approx(3141592.65)


def test_unicode_digits_other_scripts() -> None:
    # Arabic-Indic with Arabic decimal (U+066B) and thousands separator (U+066C)
    assert unicode_number_to_float("١٢٬٣٤٥٫٦٧") == pytest.approx(12345.67)
    # Eastern Arabic-Indic alternative
    assert unicode_number_to_float("٤٥٫٦٧") == pytest.approx(45.67)


def test_unicode_signs_and_toggling() -> None:
    # U+2212 MINUS SIGN and fullwidth minus, multiple minus toggle
    assert unicode_number_to_float("−-+3") == 3.0  # two minuses overall → positive
    assert unicode_number_to_float("－3") == -3.0
    assert unicode_number_to_float("+＋3") == 3.0  # plus variants collapse


def test_single_char_numerics() -> None:
    # Roman numeral and vulgar fractions via unicodedata.numeric
    assert unicode_number_to_float("Ⅻ") == 12.0
    assert unicode_number_to_float("½") == 0.5
    assert unicode_number_to_float("⅔") == pytest.approx(2 / 3)


def test_fractions_in_base() -> None:
    assert unicode_number_to_float("2½") == 2.5
    assert unicode_number_to_float("−½") == -0.5
    assert unicode_number_to_float("12⅓") == pytest.approx(12 + 1 / 3)


def test_superscript_exponents() -> None:
    assert unicode_number_to_float("3²") == 9.0
    assert unicode_number_to_float("1.5⁻³") == pytest.approx(1.5**-3)
    assert unicode_number_to_float("⅔³") == pytest.approx((2 / 3) ** 3)
    # Sign binds to base, then exponent applies
    assert unicode_number_to_float("-2²") == 4.0
    assert unicode_number_to_float("-2³") == -8.0
    assert unicode_number_to_float("-½²") == pytest.approx(0.25)


def test_scientific_notation_then_exponent() -> None:
    # ASCII e/E exponent is part of base; superscript exponent applies after
    assert unicode_number_to_float("1e3²") == 1_000.0**2
    assert unicode_number_to_float("1.2E3³") == 1_200.0**3


def test_chinese_numerals_basic() -> None:
    assert unicode_number_to_float("一百二十三") == 123.0
    assert unicode_number_to_float("两千零三") == 2003.0
    assert unicode_number_to_float("壹仟贰佰叁拾肆") == 1234.0
    assert unicode_number_to_float("二〇二五") == 2025.0
    assert unicode_number_to_float("十") == 10.0  # omitted '一十'
    assert unicode_number_to_float("廿三") == 23.0
    assert unicode_number_to_float("卌六") == 46.0


def test_chinese_with_decimal_and_fraction() -> None:
    assert unicode_number_to_float("一百二十三点四五") == pytest.approx(123.45)
    assert unicode_number_to_float("十二⅓") == pytest.approx(12 + 1 / 3)


def test_chinese_large_units() -> None:
    # 1兆 + 2345亿 + 6789万 + 1 = 1_234_567_890_001
    expected: Final[float] = 1_234_567_890_001.0
    assert (
        unicode_number_to_float("一兆二千三百四十五亿六千七百八十九万零一") == expected
    )


def test_error_cases() -> None:
    with pytest.raises(ValueError):
        unicode_number_to_float("abc")
    with pytest.raises(ValueError):
        unicode_number_to_float("1x2")
    with pytest.raises(ValueError):
        unicode_number_to_float("1½⅓")  # multiple vulgar fraction chars
    with pytest.raises(ValueError):
        unicode_number_to_float("")  # empty


def test_normalize_unicode_numeric():
    assert normalize_number("三三") == normalize_number("三三")
